/* * Copyright 2012 James Moger * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.moxie.proxy; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.lang.reflect.Method; import java.text.MessageFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Queue; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.moxie.IMavenCache; import org.moxie.MoxieCache; import org.moxie.Pom; import org.moxie.PomReader; import org.moxie.RemoteRepository; import org.moxie.utils.FileUtils; import org.moxie.utils.StringUtils; /** * The Lucene executor handles indexing and searching POM files. * * @author James Moger * */ public class LuceneExecutor implements Runnable { private static final int INDEX_VERSION = 1; private static final String FIELD_PACKAGING = "type"; private static final String FIELD_GROUPID = "groupid"; private static final String FIELD_ARTIFACTID = "artifactid"; private static final String FIELD_VERSION = "version"; private static final String FIELD_NAME = "name"; private static final String FIELD_DESCRIPTION = "description"; private static final String FIELD_DATE = "date"; private static final String LUCENE_DIR = "lucene"; private static final String CONF_VERSION = "version"; private static final Version LUCENE_VERSION = Version.LUCENE_35; private final Logger logger = Logger.getLogger(LuceneExecutor.class.getSimpleName()); private final ProxyConfig config; private final File indexesFolder; private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>(); private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>(); private final Queue<IndexPom> queue; public LuceneExecutor(ProxyConfig config) { this.config = config; this.indexesFolder = new File(config.getMoxieRoot(), LUCENE_DIR); queue = new ConcurrentLinkedQueue<IndexPom>(); } /** * Reindex is a blocking call which synchronously rebuilds each repository's * Lucene index. */ public synchronized void reindex() { for (String repository : config.getLocalRepositories()) { index(repository); } for (RemoteRepository repository : config.getRemoteRepositories()) { index(repository.id); } System.gc(); } /** * Run is executed by a scheduled executor service at a fixed rate. This * guarantees no concurrent repository index updates. Index updates are * queued and processed asynchronously by the executor service. */ @Override public void run() { if (queue.isEmpty()) { return; } Set<String> repositories = new TreeSet<String>(); long minDiff = 60*1000L; // 1 min while(!queue.isEmpty()) { IndexPom pom = queue.peek(); // Wait till oldest element has been in queue for minimum time. // // This is a practical workaround for expecting parent pom files // to have been retrieved. The alternative is to make the proxy // smart enough to identify and retrieve parent poms. The current // design relies on the client instructing the proxy to retrieve // parent poms. while ((System.currentTimeMillis() - pom.date.getTime()) < minDiff) { try { Thread.sleep(200); } catch (InterruptedException e) { // ignore } } queue.poll(); logger.info("indexing " + pom.file); incrementalIndex(pom.file); // cache the repository that we just indexed String repository = config.getRepositoryId(pom.file); repositories.add(repository); } // create/update the prefix indexes for the repositories for (String repository : repositories) { IMavenCache cache = config.getMavenCache(repository); cache.updatePrefixesIndex(); } } /** * Reads the Lucene config file for the repository to check the index * version. If the index version is different, then rebuild the repository * index. * * @param repository * @return true of the on-disk index format is different than INDEX_VERSION */ private boolean shouldReindex(String repository) { try { File folder = new File(indexesFolder, LUCENE_DIR); File file = new File(folder, "config.properties"); Properties props = new Properties(); props.load(new FileReader(file)); int indexVersion = Integer.parseInt(props.getProperty(CONF_VERSION, "0")); // reindex if versions do not match return indexVersion != INDEX_VERSION; } catch (Throwable t) { } return true; } /** * Synchronously indexes a repository. This may build a complete index of a * repository or it may update an existing index. * * @param name * the name of the repository * @param repository * the repository object */ private void index(String repository) { try { if (shouldReindex(repository)) { // (re)build the entire index IndexResult result = reindex(repository); if (result.success) { if (result.artifactCount > 0) { String msg = "Built {0} Lucene index from {1} artifacts in {2} secs"; logger.info(MessageFormat.format(msg, repository, result.artifactCount, result.duration())); } } else { String msg = "Could not build {0} Lucene index!"; logger.severe(MessageFormat.format(msg, repository)); } } else { // update the index with latest artifacts IndexResult result = updateIndex(repository); if (result.success) { if (result.artifactCount > 0) { String msg = "Updated {0} Lucene index with {1} artifacts in {2} secs"; logger.info(MessageFormat.format(msg, repository, result.artifactCount, result.duration())); } } else { String msg = "Could not update {0} Lucene index!"; logger.severe(MessageFormat.format(msg, repository)); } } // create/update the prefix indexes for the repositories IMavenCache cache = config.getMavenCache(repository); cache.updatePrefixesIndex(); } catch (Throwable t) { logger.log(Level.SEVERE, MessageFormat.format("Lucene indexing failure for {0}", repository), t); } } /** * Updates a repository index incrementally from the last indexed artifacts. * * @param repository * @return IndexResult */ private IndexResult updateIndex(String repository) { IndexResult result = new IndexResult(); return result; } /** * Close the writer/searcher objects for a repository. * * @param repositoryName */ public synchronized void close(String repositoryName) { try { IndexSearcher searcher = searchers.remove(repositoryName); if (searcher != null) { searcher.getIndexReader().close(); } } catch (Exception e) { logger.log(Level.SEVERE, "Failed to close index searcher for " + repositoryName, e); } try { IndexWriter writer = writers.remove(repositoryName); if (writer != null) { writer.close(); } } catch (Exception e) { logger.log(Level.SEVERE, "Failed to close index writer for " + repositoryName, e); } } /** * Close all Lucene indexers. * */ public synchronized void close() { // close all writers for (String writer : writers.keySet()) { try { writers.get(writer).close(true); } catch (Throwable t) { logger.log(Level.SEVERE, "Failed to close Lucene writer for " + writer, t); } } writers.clear(); // close all searchers for (String searcher : searchers.keySet()) { try { searchers.get(searcher).getIndexReader().close(); } catch (Throwable t) { logger.log(Level.SEVERE, "Failed to close Lucene searcher for " + searcher, t); } } searchers.clear(); } /** * Deletes the Lucene index for the specified repository. * * @param repositoryName * @return true, if successful */ public boolean deleteIndex(String repositoryName) { // close any open writer/searcher close(repositoryName); // delete the index folder File luceneIndex = new File(indexesFolder, repositoryName); if (luceneIndex.exists()) { FileUtils.delete(luceneIndex); } return true; } /** * This completely indexes the repository and will destroy any existing * index. * * @param repositoryName * @return IndexResult */ public IndexResult reindex(String repository) { IndexResult result = new IndexResult(); if (!deleteIndex(repository)) { return result; } try { MoxieCache moxieCache = config.getMoxieCache(); IMavenCache repositoryCache = config.getMavenCache(repository); Collection<File> files = repositoryCache.getFiles("." + org.moxie.Constants.POM); IndexWriter writer = getIndexWriter(repository); for (File pomFile : files) { try { Pom pom = PomReader.readPom(moxieCache, pomFile); String date = DateTools.timeToString(pomFile.lastModified(), Resolution.MINUTE); Document doc = new Document(); doc.add(new Field(FIELD_PACKAGING, pom.packaging, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_GROUPID, pom.groupId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_ARTIFACTID, pom.artifactId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_VERSION, pom.version, Store.YES, Index.ANALYZED)); if (!StringUtils.isEmpty(pom.name)) { doc.add(new Field(FIELD_NAME, pom.name, Store.YES, Index.ANALYZED)); } if (!StringUtils.isEmpty(pom.description)) { doc.add(new Field(FIELD_DESCRIPTION, pom.description, Store.YES, Index.ANALYZED)); } doc.add(new Field(FIELD_DATE, date, Store.YES, Index.ANALYZED)); // add the pom to the index writer.addDocument(doc); } catch (Exception e) { logger.log(Level.SEVERE, MessageFormat.format("Exception while reindexing {0} in {1}",pomFile, repository), e); } result.artifactCount++; } writer.commit(); resetIndexSearcher(repository); result.success(); } catch (Exception e) { logger.log(Level.SEVERE, "Exception while reindexing " + repository, e); } return result; } /** * Incrementally update the index. * * @return pomFile */ public void index(File pomFile) { queue.add(new IndexPom(pomFile)); } /** * Incrementally update the index. * * @return pomFile */ private void incrementalIndex(File pomFile) { try { String repository = config.getRepositoryId(pomFile); IMavenCache cache = config.getMavenCache(repository); Pom pom = PomReader.readPom(cache, pomFile); delete(repository, pom); IndexWriter writer = getIndexWriter(repository); Document doc = new Document(); doc.add(new Field(FIELD_PACKAGING, pom.packaging, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FIELD_GROUPID, pom.groupId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_ARTIFACTID, pom.artifactId, Store.YES, Index.ANALYZED)); doc.add(new Field(FIELD_VERSION, pom.version, Store.YES, Index.ANALYZED)); if (!StringUtils.isEmpty(pom.name)) { doc.add(new Field(FIELD_NAME, pom.name, Store.YES, Index.ANALYZED)); } if (!StringUtils.isEmpty(pom.description)) { doc.add(new Field(FIELD_DESCRIPTION, pom.description, Store.YES, Index.ANALYZED)); } String date = DateTools.timeToString(pomFile.lastModified(), Resolution.MINUTE); doc.add(new Field(FIELD_DATE, date, Store.YES, Index.ANALYZED)); // add the pom to the index writer.addDocument(doc); writer.commit(); resetIndexSearcher(repository); config.resetRepositorySize(repository); } catch (Exception e) { logger.log(Level.SEVERE, "Exception while indexing " + pomFile, e); } } private boolean delete(String repository, Pom pom) throws IOException { BooleanQuery query = new BooleanQuery(); Term groupTerm = new Term(FIELD_GROUPID, pom.groupId); query.add(new TermQuery(groupTerm), Occur.MUST); Term artifactTerm = new Term(FIELD_ARTIFACTID, pom.artifactId); query.add(new TermQuery(artifactTerm), Occur.MUST); Term versionTerm = new Term(FIELD_VERSION, pom.version); query.add(new TermQuery(versionTerm), Occur.MUST); IndexWriter writer = getIndexWriter(repository); int numDocsBefore = writer.numDocs(); writer.deleteDocuments(query); writer.commit(); int numDocsAfter = writer.numDocs(); if (numDocsBefore == numDocsAfter) { logger.fine(MessageFormat.format("no records found to delete {0}", query.toString())); return false; } else { logger.fine(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString())); return true; } } private SearchResult createSearchResult(Document doc, int hitId, int totalHits) throws ParseException { SearchResult result = new SearchResult(); result.hitId = hitId; result.totalHits = totalHits; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); result.groupId = doc.get(FIELD_GROUPID); result.artifactId = doc.get(FIELD_ARTIFACTID); result.version = doc.get(FIELD_VERSION); result.packaging = doc.get(FIELD_PACKAGING); result.name = doc.get(FIELD_NAME); result.description = doc.get(FIELD_DESCRIPTION); return result; } private synchronized void resetIndexSearcher(String repository) throws IOException { IndexSearcher searcher = searchers.remove(repository); if (searcher != null) { searcher.getIndexReader().close(); } } /** * Gets an index searcher for the repository. * * @param repository * @return * @throws IOException */ private IndexSearcher getIndexSearcher(String repository) throws IOException { IndexSearcher searcher = searchers.get(repository); if (searcher == null) { IndexWriter writer = getIndexWriter(repository); searcher = new IndexSearcher(IndexReader.open(writer, true)); searchers.put(repository, searcher); } return searcher; } /** * Gets an index writer for the repository. The index will be created if it * does not already exist or if forceCreate is specified. * * @param repository * @return an IndexWriter * @throws IOException */ private IndexWriter getIndexWriter(String repository) throws IOException { IndexWriter indexWriter = writers.get(repository); File indexFolder = new File(indexesFolder, repository); Directory directory = FSDirectory.open(indexFolder); if (indexWriter == null) { if (!indexFolder.exists()) { indexFolder.mkdirs(); } StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, config); writers.put(repository, indexWriter); } return indexWriter; } /** * Searches the specified repositories for the given text or query * * @param text * if the text is null or empty, null is returned * @param page * the page number to retrieve. page is 1-indexed. * @param pageSize * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) { if (repositories == null || repositories.size() == 0) { return null; } return search(text, page, pageSize, repositories.toArray(new String[0])); } /** * Searches the specified repositories for the given text or query * * @param text * if the text is null or empty, null is returned * @param page * the page number to retrieve. page is 1-indexed. * @param pageSize * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ public List<SearchResult> search(String text, int page, int pageSize, String... repositories) { if (StringUtils.isEmpty(text)) { return null; } if (repositories == null || repositories.length == 0) { return null; } Set<SearchResult> results = new LinkedHashSet<SearchResult>(); StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks groupId and artifactId BooleanQuery query = new BooleanQuery(); QueryParser qp; qp = new QueryParser(LUCENE_VERSION, FIELD_GROUPID, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); qp = new QueryParser(LUCENE_VERSION, FIELD_ARTIFACTID, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); IndexSearcher searcher; if (repositories.length == 1) { // single repository search searcher = getIndexSearcher(repositories[0]); } else { // multiple repository search List<IndexReader> readers = new ArrayList<IndexReader>(); for (String repository : repositories) { IndexSearcher repositoryIndex = getIndexSearcher(repository); readers.add(repositoryIndex.getIndexReader()); } IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); MultiSourceReader reader = new MultiSourceReader(rdrs); searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); Sort sort = new Sort(new SortField(FIELD_DATE, SortField.STRING, true)); TopFieldDocs topDocs = searcher.search(rewrittenQuery, 10000, sort); int offset = Math.max(0, (page - 1) * pageSize); ScoreDoc[] hits = topDocs.scoreDocs; int totalHits = topDocs.totalHits; if (pageSize <= 0) { pageSize = totalHits; } if (totalHits > offset) { for (int i = offset, len = Math.min(offset + pageSize, hits.length); i < len; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); SearchResult result = createSearchResult(doc, i + 1, totalHits); if (repositories.length == 1) { // single repository search result.repository = repositories[0]; } else { // multi-repository search MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader(); int index = reader.getSourceIndex(docId); result.repository = repositories[index]; } results.add(result); } } } catch (Exception e) { logger.log(Level.SEVERE, MessageFormat.format("Exception while searching for {0}", text), e); } return new ArrayList<SearchResult>(results); } /** * Simple class to track the results of an index update. */ private class IndexResult { long startTime = System.currentTimeMillis(); long endTime = startTime; boolean success; int artifactCount; void success() { success = true; endTime = System.currentTimeMillis(); } float duration() { return (endTime - startTime) / 1000f; } } /** * Custom subclass of MultiReader to identify the source index for a given * doc id. This would not be necessary of there was a public method to * obtain this information. * */ private class MultiSourceReader extends MultiReader { final Method method; MultiSourceReader(IndexReader[] subReaders) { super(subReaders); Method m = null; try { m = MultiReader.class.getDeclaredMethod("readerIndex", int.class); m.setAccessible(true); } catch (Exception e) { logger.log(Level.SEVERE, "Error getting readerIndex method", e); } method = m; } int getSourceIndex(int docId) { int index = -1; try { Object o = method.invoke(this, docId); index = (Integer) o; } catch (Exception e) { logger.log(Level.SEVERE, "Error getting source index", e); } return index; } } private class IndexPom { final File file; final Date date; IndexPom(File file) { this.file = file; this.date = new Date(); } } }